#encoding=utf-8
import scrapy
from news_spider.items import NewsSpiderItem
from pyquery import PyQuery
import json
import time
import re
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import Rule
from scrapy.spiders import CrawlSpider
import datetime
import time
import iso8601
class XinzhiyuanSpider(CrawlSpider):
    start_urls = ['https://yq.aliyun.com/teams/135?spm=5176.100239.blogcont292720.1.RZIySH']
    name = 'Xinzhiyuan'
    allowed_domains = ['aliyun.com']
    rules = (
        Rule(
            LinkExtractor(allow=r"/articles/\d+"),
            callback="parseNews",
            follow=True
        ),
    )

    def parseNews(self, response):
        self.logger.info('A response from %s just arrived!', response.url)

        item = NewsSpiderItem()
        timee = response.xpath("//span[@class='b-time']/text()").extract()
        title = response.xpath("//title/text()").extract()

        content = response.xpath("//div[@class='content-detail markdown-body']").extract()
        content = PyQuery(content[0]).text()

        timee = timee[0]
        timee = iso8601.parse_date(timee)
        timee = time.mktime(timee.timetuple())
        item['time'] = timee
        item['title'] = PyQuery(title[0]).text()
        item['url'] = response.url
        item['content'] = content
        #print cc.encode('utf-8')
        yield item
